options(java.parameters = "-Xmx5g")
library(haven)
library(tidyverse)
library(caret)
library(evalITR)
library(grf)
library(glmnet)
library(bartMachine)
students=read_sav("STAR_Students.sav")
set.seed(1)
studentrestricted=students
studentrestricted=studentrestricted%>%filter(gkclasstype %in% c(1,2),flagg3==1)
studentg3achievements=studentrestricted[c("g3treadss","g3tmathss","g3tlangss")]
studentothers=studentrestricted[,-c(grep("^gk",names(studentrestricted)),grep("^g[1-8]",names(studentrestricted)),
                                    grep("^hs",names(studentrestricted)))]
datalim=cbind(studentg3achievements,studentothers,studentrestricted$gkclasstype)
names(datalim)[length(names(datalim))]<-"classsize"
datalim=cbind(datalim,studentrestricted$gkschid)
names(datalim)[length(names(datalim))]<-"schid"
datalim<-datalim%>%select(g3treadss,g3tmathss,g3tlangss,stdntid,gender,race,birthmonth,birthyear,classsize,schid)
datalim=datalim[complete.cases(datalim),]
schools=read_sav("STAR_K-3_Schools.sav")
schools=schools%>%select(schid,SCHLURBN,GRDRANGE,GKENRMNT,GKFRLNCH,GKBUSED,GKWHITE)
schools$GKWHITE[is.na(schools$GKWHITE)]=0
schools=schools[complete.cases(schools),]
totalsetexp=left_join(datalim,schools)
totalsetexp=totalsetexp[complete.cases(totalsetexp),]
totalsetexp=totalsetexp%>%filter(birthyear>1978,race<3,birthyear<1981)

nfolds = 5
plim = 0.2
output = matrix(0, nrow = 9, ncol = 9)
set.seed(2020)
for (i in 1:3) {
  folds <- createFolds(totalsetexp$classsize,k=nfolds)
  taucvBART <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  taucvCF <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  taucvLASSO <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatcvBART <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatcvCF <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatcvLASSO <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatpcvBART <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatpcvCF <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  ThatpcvLASSO <- matrix(0, nrow = nrow(totalsetexp), ncol = nfolds)
  Tcv <- -as.numeric(totalsetexp$classsize)+2
  Ycv <- as.numeric(totalsetexp[,i])
  indcv <- numeric(length(Ycv))
  for (j in 1:nfolds) {
    
    testset <- totalsetexp[folds[[j]], ]
    trainset <- totalsetexp[-folds[[j]], ]
    indcv[folds[[j]]] <- rep(j,nrow(testset))
    Y_train=as.numeric(unlist(trainset[,i]))
    X_train=trainset%>%select(-c(stdntid,schid,g3treadss,g3tmathss,g3tlangss,classsize))
    X_train$race[X_train$race>2]=3
    X_train$gender=as.factor(X_train$gender)
    X_train$race=as.factor(X_train$race)
    #X_train$birthmonth=as.factor(X_train$birthmonth)
    X_train$birthyear=as.factor(X_train$birthyear)
    X_train$SCHLURBN=as.factor(X_train$SCHLURBN)
    X_train$GRDRANGE=as.factor(X_train$GRDRANGE)
    X_train_expand=model.matrix(~.-1,data=X_train)
    T_train=-as.numeric(trainset$classsize)+2
    tau.forest<-causal_forest(X_train_expand,Y_train,T_train,ci.group.size = 1,sample.fraction = 0.8,num.trees = 2000)
    tau_train<-predict(tau.forest)
    
    X=cbind(X_train,T=T_train)
    
    ## BART Trees
    barc1<-bartMachine(X=X,y=Y_train,num_trees = 30)
    X0=cbind(X_train,T=0)
    X1=cbind(X_train,T=1)
    Y0<-predict(barc1,X0)
    Y1<-predict(barc1,X1)
    tau_train2<-Y1-Y0
    
    
    
    ## LASSO Model
    X_expand<-model.matrix(~.*T,data=X)
    ls1<-glmnet(X_expand,Y_train,alpha=1,lambda=0.05)
    
    
    
    
    ## Testing Data
    Y_total=as.numeric(unlist(totalsetexp[,i]))
    X_total=totalsetexp%>%select(-c(stdntid,schid,g3treadss,g3tmathss,g3tlangss,classsize))
    X_total$race[X_total$race>2]=3
    X_total$gender=as.factor(X_total$gender)
    X_total$race=as.factor(X_total$race)
    X_total$birthyear=as.factor(X_total$birthyear)
    X_total$SCHLURBN=as.factor(X_total$SCHLURBN)
    X_total$GRDRANGE=as.factor(X_total$GRDRANGE)
    X_total_expand=model.matrix(~.-1,data=X_total)
    T_total=-as.numeric(totalsetexp$classsize)+2
    
    Y_test=as.numeric(unlist(testset[,i]))
    X_test=testset%>%select(-c(stdntid,schid,g3treadss,g3tmathss,g3tlangss,classsize))
    X_test$race[X_test$race>2]=3
    X_test$gender=as.factor(X_test$gender)
    X_test$race=as.factor(X_test$race)
    X_test$birthyear=as.factor(X_test$birthyear)
    X_test$SCHLURBN=as.factor(X_test$SCHLURBN)
    X_test$GRDRANGE=as.factor(X_test$GRDRANGE)
    X_test_expand=model.matrix(~.-1,data=X_test)
    T_test=-as.numeric(testset$classsize)+2
    
    X0t_total<-cbind(X_total,T=0)
    X1t_total<-cbind(X_total,T=1)
    X0t_total_expand=model.matrix(~.*T,data=X0t_total)
    X1t_total_expand=model.matrix(~.*T,data=X1t_total)
    
    X0t_test<-cbind(X_test,T=0)
    X1t_test<-cbind(X_test,T=1)
    X0t_test_expand=model.matrix(~.*T,data=X0t_test)
    X1t_test_expand=model.matrix(~.*T,data=X1t_test)
    
    ## Causal Forest
    tau_total1<-predict(tau.forest,X_total_expand)$predictions + runif(nrow(totalsetexp),-1e-6,1e-6)
    tau_test1<-predict(tau.forest,X_test_expand)$predictions + runif(nrow(testset),-1e-6,1e-6)
    
    That1 = as.numeric(tau_total1>0)
    That1p = numeric(nrow(X_total))
    That1p = as.numeric(tau_total1 >= sort(tau_test1,decreasing = TRUE)[floor(plim*length(tau_test1))+1])
    taucvCF[,j]=tau_total1
    ThatcvCF[,j]=That1
    ThatpcvCF[,j]=That1p
    
    
    ## BART
    Y0t_total<-predict(barc1,X0t_total)
    Y1t_total<-predict(barc1,X1t_total)
    Y0t_test<-predict(barc1,X0t_test)
    Y1t_test<-predict(barc1,X1t_test)
    tau_total2<-Y1t_total-Y0t_total + runif(nrow(totalsetexp),-1e-6,1e-6)
    tau_test2<-Y1t_test-Y0t_test + runif(nrow(testset),-1e-6,1e-6)
    
    That2=as.numeric(tau_total2>0)
    That2p = numeric(nrow(X_total))
    That2p = as.numeric(tau_total2 >= sort(tau_test2,decreasing = TRUE)[floor(plim*length(tau_test2))+1])
    taucvBART[,j]=tau_total2
    ThatcvBART[,j]=That2
    ThatpcvBART[,j]=That2p    
    
    
    
    ## LASSO
    Y0t1_total<-predict(ls1,X0t_total_expand)
    Y1t1_total<-predict(ls1,X1t_total_expand)
    Y0t1_test<-predict(ls1,X0t_test_expand)
    Y1t1_test<-predict(ls1,X1t_test_expand)
    tau_total3<-Y1t1_total-Y0t1_total + runif(nrow(totalsetexp),-1e-6,1e-6)
    tau_test3<-Y1t1_test-Y0t1_test + runif(nrow(testset),-1e-6,1e-6)
    
    That3=as.numeric(tau_total3>0)
    That3p = numeric(nrow(X_total))
    That3p = as.numeric(tau_total3 >= sort(tau_test3,decreasing = TRUE)[floor(plim*length(tau_test3))+1])
    taucvLASSO[,j]=tau_total3
    ThatcvLASSO[,j]=That3
    ThatpcvLASSO[,j]=That3p    
  }
  PAPECF = PAPEcv(Tcv,ThatcvCF, Ycv, indcv)
  PAPEBART = PAPEcv(Tcv,ThatcvBART, Ycv, indcv)
  PAPELASSO = PAPEcv(Tcv,ThatcvLASSO, Ycv, indcv)
  PAPEpCF = PAPEcv(Tcv,ThatpcvCF, Ycv, indcv, plim)
  PAPEpBART = PAPEcv(Tcv,ThatpcvBART, Ycv, indcv, plim)
  PAPEpLASSO = PAPEcv(Tcv,ThatpcvLASSO, Ycv, indcv, plim)  
  PAPDpCFBART = PAPDcv(Tcv,ThatpcvCF, ThatpcvBART, Ycv, indcv, plim)
  PAPDpBARTLASSO = PAPDcv(Tcv,ThatpcvBART, ThatpcvLASSO, Ycv, indcv, plim)
  PAPDpCFLASSO = PAPDcv(Tcv,ThatpcvCF, ThatpcvLASSO, Ycv, indcv, plim)  
  
  output[i,1]=PAPECF$pape
  output[i,2]=PAPECF$sd
  output[i,3]=mean(sapply(1:max(indcv),function(i) mean(ThatcvCF[indcv==i, i])))
  output[i,4]=PAPEBART$pape
  output[i,5]=PAPEBART$sd
  output[i,6]=mean(sapply(1:max(indcv),function(i) mean(ThatcvBART[indcv==i, i])))
  output[i,7]=PAPELASSO$pape
  output[i,8]=PAPELASSO$sd
  output[i,9]=mean(sapply(1:max(indcv),function(i) mean(ThatcvLASSO[indcv==i, i])))
  output[i+3,1]=PAPEpCF$pape
  output[i+3,2]=PAPEpCF$sd
  output[i+3,3]=mean(sapply(1:max(indcv),function(i) mean(ThatpcvCF[indcv==i, i])))
  output[i+3,4]=PAPEpBART$pape
  output[i+3,5]=PAPEpBART$sd
  output[i+3,6]=mean(sapply(1:max(indcv),function(i) mean(ThatpcvBART[indcv==i, i])))
  output[i+3,7]=PAPEpLASSO$pape
  output[i+3,8]=PAPEpLASSO$sd
  output[i+3,9]=mean(sapply(1:max(indcv),function(i) mean(ThatpcvLASSO[indcv==i, i])))
  output[i+6,1]=PAPDpCFBART$papd
  output[i+6,2]=PAPDpCFBART$sd
  output[i+6,3]=0
  output[i+6,4]=PAPDpBARTLASSO$papd
  output[i+6,5]=PAPDpBARTLASSO$sd
  output[i+6,6]=0
  output[i+6,7]=PAPDpCFLASSO$papd
  output[i+6,8]=PAPDpCFLASSO$sd
  output[i+6,9]=0
  write.csv(taucvCF,paste0(c("cv_CF_" ,toString(i), ".csv"), collapse = ""))
  write.csv(taucvBART,paste0(c("cv_BART_" ,toString(i), ".csv"), collapse = ""))
  write.csv(taucvLASSO,paste0(c("cv_LASSO_" ,toString(i), ".csv"), collapse = ""))
}
write.csv(Tcv,"cv_treatments.csv")
write.csv(Ycv,"cv_outcomes.csv")
write.csv(indcv,"cv_folds.csv")
write.csv(output,"cv_output.csv")
